Library and Data Read-In

tri_data <- read_csv("../data/tri_data.csv", show_col_types = FALSE)
tri_data %>% 
  slice(1:10000) %>% 
  vis_dat()

tri_data %>% 
  slice(1:10000) %>% 
  vis_miss()

Outline

Introduction

introduction to dataset & summary stats introduce primary research question: what is the relationship between industry sector and location, pollution, chemical use and has this shifted over the past 10 years?

Location Analysis

location of facilities, location trends, tribal lands, proximity question: how many facilities are located on tribal land and is there a significant difference in the type and amount of chemical release when compared to facilities not on tribal land?

facility_location <- tri_data %>% 
  select("facility_name", "street_address", "city", "county", "state", "zip_code", "bia_code", "tribal_land", "latitude", "longitude") %>% 
  distinct()

facility_location <- facility_location %>% 
  mutate(tribal_yes_no = case_when(is.na(tribal_land) == FALSE ~ "Tribal Land",
                                   TRUE ~ "Not Tribal Land"))
tri_data %>% 
  distinct(facility_name) %>% 
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1 26267
tri_data %>% 
  select(standard_parent_co_name, parent_co_name, facility_name) %>% 
  group_by(standard_parent_co_name, parent_co_name) %>% 
  summarise(number_of_facilities = n_distinct(facility_name)) %>% 
  arrange(desc(number_of_facilities))
## `summarise()` has grouped output by 'standard_parent_co_name'. You can override
## using the `.groups` argument.
## # A tibble: 7,925 × 3
## # Groups:   standard_parent_co_name [5,483]
##    standard_parent_co_name       parent_co_name             number_of_facilities
##    <chr>                         <chr>                                     <int>
##  1 <NA>                          <NA>                                       6433
##  2 US DEPARTMENT OF DEFENSE      US DEPARTMENT OF DEFENSE                    324
##  3 CRH AMERICAS INC              CRH AMERICAS INC                            252
##  4 CEMEX INC                     CEMEX INC                                   208
##  5 ARGOS USA CORP                ARGOS USA CORP                              186
##  6 BERKSHIRE HATHAWAY INC        BERKSHIRE HATHAWAY INC                      186
##  7 CLEAN HARBORS INC             CLEAN HARBORS INC                           136
##  8 KOCH INDUSTRIES INC           KOCH INDUSTRIES INC                         121
##  9 MARTIN MARIETTA MATERIALS INC MARTIN MARIETTA MATERIALS…                  118
## 10 TYSON FOODS INC               TYSON FOODS INC                             110
## # ℹ 7,915 more rows
tri_data %>% 
  distinct(longitude, latitude, facility_name, parent_co_name, industry_sector, city, state) %>% 
  leaflet() %>% 
addTiles() %>% 
  addCircleMarkers(lng = ~longitude, 
                 lat = ~latitude, 
                 clusterOptions = markerClusterOptions(),
                 popup = ~paste0(facility_name, "<br>", parent_co_name, "<br>", industry_sector, "<br>", city, ", ", state))
## Warning in validateCoords(lng, lat, funcName): Data contains 2 rows with either
## missing or invalid lat/lon values and will be ignored
facility_location %>% 
  select(tribal_yes_no) %>% 
  group_by(tribal_yes_no) %>%
  summarise(percent = 100 * n()/nrow(facility_location))
## # A tibble: 2 × 2
##   tribal_yes_no   percent
##   <chr>             <dbl>
## 1 Not Tribal Land  99.7  
## 2 Tribal Land       0.311

Industry Analysis

industry sector trends (location, chemical use, pollution), changes in industry sector prevalence question: is there a correlation between industry sector and frequency of carcinogen, pfas, and pbt use?

industry_info <- tri_data %>% 
  select(industry_sector, state, clean_air_act_chemical, carcinogen, metal_category, pbt, pfas, on_site_release_total, off_site_release_total, on_site_contained, off_site_contain) %>% 
  distinct(industry_sector, state, clean_air_act_chemical, carcinogen, metal_category, pbt, pfas, on_site_release_total, off_site_release_total, on_site_contained, off_site_contain)
#tri_data %>% 
 # select(industry_sector) %>% 
 # group_by(industry_sector) %>%
 # summarise(percent = 100 * n()/nrow(tri_data)) %>% 
 # arrange(desc(percent))

tri_data %>% 
  ggplot(aes(y = fct_infreq(state), fill = industry_sector)) +
  geom_bar(na.rm = TRUE, color = "white")

Chemical Analysis

chemical info (type, category, etc), trends in chemical use and disposal question: is there a correlation between location and frequency of use of Clean Air Act chemicals?

chemical_info <- tri_data %>% 
  select(chemical, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(chemical, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  arrange(chemical)
tri_data %>% 
  select(chemical) %>% 
  group_by(chemical) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 627 × 2
##    chemical                                                              percent
##    <chr>                                                                   <dbl>
##  1 Lead                                                                     5.25
##  2 Lead compounds                                                           3.91
##  3 Zinc compounds                                                           3.79
##  4 Nickel                                                                   3.17
##  5 Copper                                                                   3.09
##  6 Chromium                                                                 3.05
##  7 Ammonia                                                                  2.88
##  8 Manganese                                                                2.85
##  9 Nitrate compounds (water dissociable; reportable only when in aqueou…    2.76
## 10 Methanol                                                                 2.70
## # ℹ 617 more rows
tri_data %>% 
  select(elemental_metal_included) %>% 
  group_by(elemental_metal_included) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   elemental_metal_included percent
##   <chr>                      <dbl>
## 1 NO                         97.6 
## 2 YES                         2.37
tri_data %>% 
  select(clean_air_act_chemical) %>% 
  group_by(clean_air_act_chemical) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   clean_air_act_chemical percent
##   <chr>                    <dbl>
## 1 YES                       62.6
## 2 NO                        37.4
tri_data %>% 
  select(metal) %>% 
  group_by(metal) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   metal percent
##   <chr>   <dbl>
## 1 NO       61.7
## 2 YES      38.3
tri_data %>% 
  select(metal_category) %>% 
  group_by(metal_category) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 6 × 2
##   metal_category                                   percent
##   <chr>                                              <dbl>
## 1 Non_Metal                                         53.9  
## 2 Metal complound categories                        20.6  
## 3 Elemental metals                                  18.7  
## 4 May contain metal                                  4.95 
## 5 Individually-listed compounds that contain metal   1.07 
## 6 Metals with qualifiers                             0.733
tri_data %>% 
  select(carcinogen) %>% 
  group_by(carcinogen) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   carcinogen percent
##   <chr>        <dbl>
## 1 NO            71.6
## 2 YES           28.4
tri_data %>% 
  select(pbt) %>% 
  group_by(pbt) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   pbt   percent
##   <chr>   <dbl>
## 1 NO       82.8
## 2 YES      17.2
tri_data %>% 
  select(pfas) %>% 
  group_by(pfas) %>%
  summarise(percent = 100 * n()/nrow(tri_data)) %>% 
  arrange(desc(percent))
## # A tibble: 2 × 2
##   pfas   percent
##   <chr>    <dbl>
## 1 NO    100.    
## 2 YES     0.0203
chemical_info %>% 
  select(chemical, carcinogen) %>% 
  filter(carcinogen == "YES") %>% 
  group_by(chemical)
## # A tibble: 187 × 2
## # Groups:   chemical [187]
##    chemical                    carcinogen
##    <chr>                       <chr>     
##  1 1,1,1,2-Tetrachloroethane   YES       
##  2 1,1,2,2-Tetrachloroethane   YES       
##  3 1,1-Dimethylhydrazine       YES       
##  4 1,2,3-Trichloropropane      YES       
##  5 1,2-Butylene oxide          YES       
##  6 1,2-Dibromo-3-chloropropane YES       
##  7 1,2-Dibromoethane           YES       
##  8 1,2-Dichloroethane          YES       
##  9 1,2-Dichloropropane         YES       
## 10 1,2-Diphenylhydrazine       YES       
## # ℹ 177 more rows
chemical_info %>% 
  select(chemical, carcinogen) %>% 
  filter(carcinogen == "NO") %>% 
  group_by(chemical)
## # A tibble: 441 × 2
## # Groups:   chemical [440]
##    chemical                                               carcinogen
##    <chr>                                                  <chr>     
##  1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a)         NO        
##  2 1,1,1-Trichloroethane                                  NO        
##  3 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121)          NO        
##  4 1,1,2,2-Tetrahydroperfluorodecyl acrylate              NO        
##  5 1,1,2,2-Tetrahydroperfluorododecyl acrylate            NO        
##  6 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate          NO        
##  7 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate         NO        
##  8 1,1,2-Trichloroethane                                  NO        
##  9 1,1-Dichloro-1,2,2,3,3-pentafluoropropane (HCFC-225cc) NO        
## 10 1,1-Dichloro-1-fluoroethane (HCFC-141b)                NO        
## # ℹ 431 more rows
chemical_info %>% 
  select(chemical, pfas) %>% 
  filter(pfas == "YES") %>% 
  group_by(chemical)
## # A tibble: 56 × 2
## # Groups:   chemical [56]
##    chemical                                                                pfas 
##    <chr>                                                                   <chr>
##  1 1,1,2,2-Tetrahydroperfluorodecyl acrylate                               YES  
##  2 1,1,2,2-Tetrahydroperfluorododecyl acrylate                             YES  
##  3 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate                           YES  
##  4 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate                          YES  
##  5 1-Decanol, 3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,10-heptadecafluoro-        YES  
##  6 1-Octanesulfonamide, 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro… YES  
##  7 1-Octanesulfonamide, 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro… YES  
##  8 1-Octanesulfonamide, N-butyl-1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptade… YES  
##  9 1-Propanaminium, 2-hydroxy-N,N,N-trimethyl-, 3-[(γ-ω-perfluoro-C6-20-a… YES  
## 10 1-Propanaminium, 3-amino-N-(carboxymethyl)-N,N-dimethyl-, N-[2-[(γ-ω-p… YES  
## # ℹ 46 more rows
chemical_info %>% 
  select(chemical, pfas) %>% 
  filter(pfas == "NO") %>% 
  group_by(chemical)
## # A tibble: 572 × 2
## # Groups:   chemical [571]
##    chemical                                               pfas 
##    <chr>                                                  <chr>
##  1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a)         NO   
##  2 1,1,1,2-Tetrachloroethane                              NO   
##  3 1,1,1-Trichloroethane                                  NO   
##  4 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121)          NO   
##  5 1,1,2,2-Tetrachloroethane                              NO   
##  6 1,1,2-Trichloroethane                                  NO   
##  7 1,1-Dichloro-1,2,2,3,3-pentafluoropropane (HCFC-225cc) NO   
##  8 1,1-Dichloro-1-fluoroethane (HCFC-141b)                NO   
##  9 1,1-Dimethylhydrazine                                  NO   
## 10 1,2,3-Trichloropropane                                 NO   
## # ℹ 562 more rows
chemical_info %>% 
  select(chemical, pbt) %>% 
  filter(pbt == "YES") %>% 
  group_by(chemical)
## # A tibble: 23 × 2
## # Groups:   chemical [23]
##    chemical                         pbt  
##    <chr>                            <chr>
##  1 Aldrin                           YES  
##  2 Benzo[g,h,i]perylene             YES  
##  3 Chlordane                        YES  
##  4 Dioxin and dioxin-like compounds YES  
##  5 Heptachlor                       YES  
##  6 Hexabromocyclododecane           YES  
##  7 Hexachlorobenzene                YES  
##  8 Isodrin                          YES  
##  9 Lead                             YES  
## 10 Lead  And Lead Compounds         YES  
## # ℹ 13 more rows
chemical_info %>% 
  select(chemical, pbt) %>% 
  filter(pbt == "NO") %>% 
  group_by(chemical)
## # A tibble: 605 × 2
## # Groups:   chemical [604]
##    chemical                                       pbt  
##    <chr>                                          <chr>
##  1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a) NO   
##  2 1,1,1,2-Tetrachloroethane                      NO   
##  3 1,1,1-Trichloroethane                          NO   
##  4 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121)  NO   
##  5 1,1,2,2-Tetrachloroethane                      NO   
##  6 1,1,2,2-Tetrahydroperfluorodecyl acrylate      NO   
##  7 1,1,2,2-Tetrahydroperfluorododecyl acrylate    NO   
##  8 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate  NO   
##  9 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate NO   
## 10 1,1,2-Trichloroethane                          NO   
## # ℹ 595 more rows
tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = elemental_metal_included, color = elemental_metal_included, fill = elemental_metal_included)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = clean_air_act_chemical, color = clean_air_act_chemical, fill = clean_air_act_chemical)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = metal, color = metal, fill = metal)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = metal_category, color = metal_category, fill = metal_category)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = carcinogen, color = carcinogen, fill = carcinogen)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = pbt, color = pbt, fill = pbt)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>% 
  ggplot(aes(x = pfas, color = pfas, fill = pfas)) +
  geom_bar() +
  coord_flip() +
  facet_wrap(~year) 

tri_data %>% 
  group_by(metal_category) %>% 
  summarise(prop = n()/nrow(tri_data)) %>% 
  ggplot(aes(x = "", y = prop, fill = fct_inorder(metal_category))) +
  geom_col(color = "white", linewidth = .3) +
  geom_text(aes(x = "", y = prop, label = round(prop, 2)), position = position_stack(vjust = 0.5)) +
 # geom_label_repel(aes(x = "", y = prop, label = paste0(prop, "%")), size = 4.5, nudge_x = 1, show.legend = FALSE) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "PiYG")

Pollution analysis

trends in pollution, on-site vs off-site disposal question: what are the trends in pollution (chemical release) over the past 10 years?

pollution_info <- tri_data %>% 
  select(year, fugitive_air, stack_air, water, underground, underground_class_1, underground_class_2_through_5, landfills, land_treatment, surface_impoundment, other_disposal, on_site_release_total, public_treatment_total_transfer, off_site_release_total, off_site_recycled_total, off_site_energy_recovery_total, off_site_treated_total, total_transfer, total_releases, releases, on_site_contained, on_site_other, off_site_contain, off_site_other)
pollution_info %>% 
  ggplot(aes(x = year, y = fugitive_air)) + 
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

pollution_info %>% 
  ggplot(aes())

Conclusion

overview of data and conclusions about pollution and chemical use trends